##loading the data and libraries 
library(tidyverse)
library(janitor)

neighbourhood_rating <- read_csv("../raw_data/neighbourhood_rating.csv") %>% 
  clean_names()
Rows: 38055 Columns: 13── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (11): FeatureCode, Measurement, Units, Neighbourhood rating, Gender, Urban Rural Classification, SIMD quintiles, Typ...
dbl  (2): DateCode, Value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
green_spaces <- read_csv("../raw_data/green_spaces.csv") %>% 
  clean_names()
Rows: 38451 Columns: 13── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (11): FeatureCode, Measurement, Units, Distance to Nearest Green or Blue Space, Age, Gender, Urban Rural Classificat...
dbl  (2): DateCode, Value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
community_belonging <-read_csv("../raw_data/community_belonging.csv") %>% 
  clean_names()
Rows: 43611 Columns: 13── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (11): FeatureCode, Measurement, Units, Community belonging, Gender, Urban Rural Classification, SIMD quintiles, Type...
dbl  (2): DateCode, Value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
neighbourhood_rating
green_spaces
community_belonging

#Great, no missing values

#checking missing values 
green_spaces %>%
  summarise(count = sum(is.na(green_spaces)))

community_belonging %>% 
  summarise(count = sum(is.na(community_belonging)))

neighbourhood_rating %>% 
  summarise(count = sum(is.na(neighbourhood_rating)))
neighbourhood_rating %>% 
  count(date_code)
neighbourhood_rating %>% 
  count(walking_distance_to_nearest_greenspace)
neighbourhood_rating %>% 
  count(type_of_tenure)
neighbourhood_rating %>% 
  count(type_of_tenure)
neighbourhood_rating %>% 
  count(measurement)
neighbourhood_rating %>% 
  count(units)
neighbourhood_rating %>% 
  count(simd_quintiles)
community %>% 
  count(measurement)
Error in count(., measurement) : object 'community' not found
#council areas codes 
council_areas <- read_csv("../raw_data/967937c4-8d67-4f39-974f-fd58c4acfda5.csv") %>% 
  clean_names()
Rows: 44 Columns: 14── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (7): CA, CAName, HSCP, HSCPName, HB, HBName, Country
dbl (7): _id, CADateEnacted, CADateArchived, HSCPDateEnacted, HSCPDateArchived, HBDateEnacted, HBDateArchived
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
council_areas<- council_areas %>% 
  select(ca, ca_name)
council_areas %>% 
  rename(feature_code = ca)
green_spaces_joined <- inner_join(
  green_spaces, council_areas, by = c("feature_code" = "ca"))
green_spaces_joined
community_belonging_joined <- inner_join(
  community_belonging, council_areas, by = c("feature_code" = "ca")) 
community_belonging_joined
neighbourhood_rating_joined <-  inner_join(
  neighbourhood_rating, council_areas, by = c("feature_code" = "ca"))
neighbourhood_rating_joined
neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace == "More than 10 minutes") %>% 
  count(value)
neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace == "Less than 10 minutes") %>% 
  count(neighbourhood_rating)
neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace == "More than 10 minutes") %>% 
  count(neighbourhood_rating)
neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace == "Less than 10 minutes") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace == "More than 10 minutes") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
neighbourhood_rating_joined %>% 
  count(ca_name) %>% 
  arrange(desc(n))
neighbourhood_rating_joined %>% 
  filter(date_code == "2019",
    walking_distance_to_nearest_greenspace == "Less than 10 minutes") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
neighbourhood_rating_joined %>% 
  filter(date_code == "2019",
    walking_distance_to_nearest_greenspace == "More than 10 minutes") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
neighbourhood_rating_joined %>% 
  filter(date_code == "2019") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
neighbourhood_rating_joined %>% 
  filter(date_code == "2018") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
neighbourhood_rating_joined %>% 
  count(date_code)
neighbourhood_rating_joined %>% 
  filter(date_code == "2017") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
neighbourhood_rating_joined %>% 
  filter(date_code == "2016") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
green_spaces_joined %>% 
  count(gender)

##Are there certain groups that have/ lack local access to green space?

green_spaces_joined
green_spaces_joined %>% 
  count(distance_to_nearest_green_or_blue_space)
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "A 5 minute walk or less") %>% 
  count(gender)
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "An 11 minute walk or more") %>% 
  count(gender)
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "Within a 6-10 minute walk") %>% 
  count(gender)
#age access
green_spaces_joined %>% 
  count(age)
#age access
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "A 5 minute walk or less",
         date_code == "2018") %>% 
  count(age)
#age access
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "An 11 minute walk or more") %>% 
  count(age)
green_spaces_joined
#simd_quintiles
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "A 5 minute walk or less", 
         date_code == "2013") %>% 
  count(simd_quintiles)
#simd_quintiles
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "An 11 minute walk or more") %>% 
  count(simd_quintiles)
green_spaces_joined
#simd_quintiles
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "A 5 minute walk or less", 
         date_code == "2019") %>% 
  count(simd_quintiles)

green_spaces_joined %>% 
  mutate(age = factor(age, levels = c("16-34 years", "35-64 years",
                                      "65 years and over", "All")),
         simd_quintiles = factor(simd_quintiles, levels = c("20% most deprived",
                                                            "80% least deprived",
                                                            "All")),
         distance_to_nearest_green_or_blue_space = factor(
           distance_to_nearest_green_or_blue_space, levels = c(
             "A 5 minute walk or less",
             "Within a 6-10 minute walk",
             "An 11 minute walk or more",
             "Don't Know"
           )
         )) 
#plotting simd_quintiles for every year to see if there're significant 
#differences for each year
#Filtering out Simd_quintiles == "All"
green_spaces_joined %>% 
  filter(simd_quintiles != "All") %>% 
ggplot(aes(x = distance_to_nearest_green_or_blue_space, fill = simd_quintiles))+
  geom_bar(position = "dodge")+
  facet_wrap(~date_code)

#plotting excluding outliers
green_spaces_joined %>% 
  filter(measurement == "Percent") %>% 
ggplot(aes(x = simd_quintiles, fill = distance_to_nearest_green_or_blue_space))+
  geom_bar(position = "dodge")+
  facet_wrap(~date_code)

#plotting excluding outliers for total years
green_spaces_joined %>% 
ggplot(aes(x = distance_to_nearest_green_or_blue_space, fill = simd_quintiles))+
  geom_bar(position = "dodge")

green_spaces_joined %>% 
    group_by(age, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = age, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
geom_col(position = "dodge")
`summarise()` has grouped output by 'age'. You can override using the `.groups` argument.

green_spaces_joined %>% 
    #filter(measurement == "Percent") %>% 
    group_by(ca_name, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = ca_name, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+ 
           geom_col(position = "dodge")+
  facet_wrap(~ca_name)
`summarise()` has grouped output by 'ca_name'. You can override using the `.groups` argument.

#simd quinitles and distance to green spaces
green_spaces_joined %>%
  filter(simd_quintiles != "All") %>% 
    group_by(simd_quintiles, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = simd_quintiles, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'simd_quintiles'. You can override using the `.groups` argument.

green_spaces_joined %>% 
    #filter(measurement == "Percent") %>% 
    group_by(, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = simd_quintiles, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
Error in FUN(X[[i]], ...) : object 'simd_quintiles' not found

#subsetting the All
green_spaces_joined %>% 
    #filter(measurement == "Percent") %>% 
    group_by(simd_quintiles, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(subset(green_spaces_joined, simd_quintiles %in% c("All")))+
  geom_col(aes(simd_quintiles, mean_percentage, fill = distance_to_nearest_green_or_blue_space))
`summarise()` has grouped output by 'simd_quintiles'. You can override using the `.groups` argument.
Error in `ggplot()`:
! Mapping should be created with `aes()` or `aes_()`.
Backtrace:
 1. ... %>% ...
 3. ggplot2:::ggplot.default(., subset(green_spaces_joined, simd_quintiles %in% c("All")))
green_spaces_joined %>% 
    #filter(measurement == "Percent") %>% 
    group_by(gender, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = gender, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'gender'. You can override using the `.groups` argument.

green_spaces_joined %>% 
    group_by(urban_rural_classification, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = urban_rural_classification, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'urban_rural_classification'. You can override using the `.groups` argument.

#type of tenure
green_spaces_joined %>% 
 filter(type_of_tenure != "All") %>% 
     group_by(type_of_tenure, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = type_of_tenure, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'type_of_tenure'. You can override using the `.groups` argument.

#household type
green_spaces_joined %>% 
  filter(household_type != "All") %>% 
    group_by(household_type, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = household_type, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'household_type'. You can override using the `.groups` argument.

#ethnicity
green_spaces_joined %>% 
    group_by(ethnicity, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = ethnicity, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'ethnicity'. You can override using the `.groups` argument.

#ethnicity and trying to subset "All"
green_spaces_joined %>% 
    group_by(ethnicity, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(subset(ethnicity %in% "All"), aes(x = ethnicity, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'ethnicity'. You can override using the `.groups` argument.Error in ethnicity %in% "All" : object 'ethnicity' not found
   ggplot(subset(green_spaces_joined,
                 ethnicity %in% "All"), aes(x = ethnicity, y = value, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")

#ethnicity
green_spaces_joined %>% 
    group_by(ethnicity, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = ethnicity(-c("All")), 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'ethnicity'. You can override using the `.groups` argument.Error in ethnicity(-c("All")) : could not find function "ethnicity"

#ethnicity
green_spaces_joined %>% 
    group_by(ethnicity(-c("All")), distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = ethnicity, 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
Error in `group_by()`:
! Problem adding computed columns.
Caused by error in `mutate()`:
! Problem while computing `..1 = ethnicity(-c("All"))`.
Caused by error in `ethnicity()`:
! could not find function "ethnicity"
Backtrace:
  1. ... %>% ...
  5. dplyr:::group_by.data.frame(., ethnicity(-c("All")), distance_to_nearest_green_or_blue_space)
  6. dplyr::group_by_prepare(.data, ..., .add = .add, caller_env = caller_env())
  7. dplyr:::add_computed_columns(...)
  9. dplyr:::mutate_cols(...)
 11. mask$eval_all_mutate(quo)
#ethnicity
green_spaces_joined %>% 
  filter(ethnicity != "All") %>% 
    group_by(ethnicity, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = ethnicity, 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'ethnicity'. You can override using the `.groups` argument.

#ethnicity
green_spaces_joined %>% 
 filter(ethnicity != "All") %>% 
    group_by(ethnicity, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = ethnicity, 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'ethnicity'. You can override using the `.groups` argument.

green_spaces_joined
#gender and distance to green spaces
green_spaces_joined %>% 
 filter(gender != "All") %>% 
    group_by(gender, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = gender, 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'gender'. You can override using the `.groups` argument.

#gender and distance to green spaces
green_spaces_joined %>% 
 filter(urban_rural_classification != "All") %>% 
    group_by(urban_rural_classification, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = urban_rural_classification, 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'urban_rural_classification'. You can override using the `.groups` argument.

#working on neighbourhood rating

neighbourhood_rating_joined
community_belonging_joined
neighbourhood_rating_joined %>% 
  count(neighbourhood_rating)

#interesting data, maybe I’ll need to bin the data into good and poor, the difference is maybe more evident or perhaps I can plot the differences in two different plots.

neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace != "All") %>% 
    group_by(neighbourhood_rating, walking_distance_to_nearest_greenspace) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = neighbourhood_rating, 
             y = mean_percentage, 
             fill = walking_distance_to_nearest_greenspace))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'neighbourhood_rating'. You can override using the `.groups` argument.

neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace != "All", 
         neighbourhood_rating == "Very good") %>% 
    group_by(neighbourhood_rating, walking_distance_to_nearest_greenspace) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = neighbourhood_rating, 
             y = mean_percentage, 
             fill = walking_distance_to_nearest_greenspace))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'neighbourhood_rating'. You can override using the `.groups` argument.

neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace != "All", 
         neighbourhood_rating == "Very poor") %>% 
    group_by(neighbourhood_rating, walking_distance_to_nearest_greenspace) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = neighbourhood_rating, 
             y = mean_percentage, 
             fill = walking_distance_to_nearest_greenspace))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'neighbourhood_rating'. You can override using the `.groups` argument.

#binning into good and very good and poor and very poor

#urban/ rural areas and distance to green spaces
green_spaces_joined %>% 
 filter(urban_rural_classification != "All") %>% 
    group_by(urban_rural_classification, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = urban_rural_classification, 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'urban_rural_classification'. You can override using the `.groups` argument.

#urban/ rural areas and distance to green spaces
green_spaces_joined %>% 
 filter(urban_rural_classification != "All", 
        measurement == "Percent") %>% 
    group_by(urban_rural_classification, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = urban_rural_classification, 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
`summarise()` has grouped output by 'urban_rural_classification'. You can override using the `.groups` argument.

#let’s do hypothesis testing on urban and rural

green_spaces_joined
green_spaces_joined %>% 
  count(measurement)

#are there differences in green spaces access between rural or urban

---
title: "R Notebook"
output: html_notebook
---

```{r}
##loading the data and libraries 
library(tidyverse)
library(janitor)

neighbourhood_rating <- read_csv("../raw_data/neighbourhood_rating.csv") %>% 
  clean_names()
green_spaces <- read_csv("../raw_data/green_spaces.csv") %>% 
  clean_names()
community_belonging <-read_csv("../raw_data/community_belonging.csv") %>% 
  clean_names()
```
```{r}
neighbourhood_rating
green_spaces
community_belonging
```


#Great, no missing values 
```{r}
#checking missing values 
green_spaces %>%
  summarise(count = sum(is.na(green_spaces)))

community_belonging %>% 
  summarise(count = sum(is.na(community_belonging)))

neighbourhood_rating %>% 
  summarise(count = sum(is.na(neighbourhood_rating)))
```


```{r}
neighbourhood_rating %>% 
  count(date_code)
```
```{r}
neighbourhood_rating %>% 
  count(walking_distance_to_nearest_greenspace)
```
```{r}
neighbourhood_rating %>% 
  count(type_of_tenure)
```


```{r}
neighbourhood_rating %>% 
  count(type_of_tenure)
```
```{r}
neighbourhood_rating %>% 
  count(measurement)
```
```{r}
neighbourhood_rating %>% 
  count(units)
```

```{r}
neighbourhood_rating %>% 
  count(simd_quintiles)
```



```{r}
green_spaces %>% 
  count(date_code)

green_spaces %>% 
  count(measurement)

green_spaces %>% 
  count(distance_to_nearest_green_or_blue_space)
```


```{r}
green_spaces
```

```{r}
green_spaces %>% 
  count(distance_to_nearest_green_or_blue_space)
```

```{r}
green_spaces %>% 
  count(age)
```

```{r}
green_spaces %>% 
  count(gender)
```

```{r}
green_spaces %>% 
  count(urban_rural_classification)
```

```{r}
green_spaces %>% 
  count(simd_quintiles)
```

```{r}
community_belonging
```

```{r}
community_belonging %>% 
  count(community_belonging)
```
```{r}
community_belonging %>% 
  count(feature_code)
```

```{r}
#council areas codes 
council_areas <- read_csv("../raw_data/967937c4-8d67-4f39-974f-fd58c4acfda5.csv") %>% 
  clean_names()

```

```{r}
council_areas<- council_areas %>% 
  select(ca, ca_name)
```
```{r}
council_areas %>% 
  rename(feature_code = ca)
```

```{r}
green_spaces_joined <- inner_join(
  green_spaces, council_areas, by = c("feature_code" = "ca"))
green_spaces_joined
```

```{r}

```

```{r}
council_areas
```
```{r}
green_spaces
```

```{r}
green_spaces_joined
```

```{r}
community_belonging_joined <- inner_join(
  community_belonging, council_areas, by = c("feature_code" = "ca")) 
community_belonging_joined
```
```{r}
neighbourhood_rating_joined <-  inner_join(
  neighbourhood_rating, council_areas, by = c("feature_code" = "ca"))
neighbourhood_rating_joined
```
```{r}
neighbourhood_rating_joined %>% 
  count(walking_distance_to_nearest_greenspace)
```

```{r}
neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace == "Less than 10 minutes") %>% 
  count(value)
```

```{r}
neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace == "More than 10 minutes") %>% 
  count(value)
```

```{r}
neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace == "Less than 10 minutes") %>% 
  count(neighbourhood_rating)
```

```{r}
neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace == "More than 10 minutes") %>% 
  count(neighbourhood_rating)
```

```{r}
#apparently south Lankarshire is ca where people have both access to a green space in more or less than 10 mins. This probably because the sample generally containts more values for South Lankarshire. Perhaps I can do a proportion?  
#Per capita = Unit / Number of people in a population
neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace == "Less than 10 minutes") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
```

```{r}
#apparently south Lankarshire is ca where people have both access to a green space in more or less than 10 mins. This probably because the sample generally containts more values for South Lankarshire. Perhaps I can do a proportion?  
#Per capita = Unit / Number of people in a population
neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace == "More than 10 minutes") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
```

```{r}
neighbourhood_rating_joined %>% 
  count(ca_name) %>% 
  arrange(desc(n))
```

```{r}
neighbourhood_rating_joined %>% 
  filter(date_code == "2019",
    walking_distance_to_nearest_greenspace == "Less than 10 minutes") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
```
```{r}
neighbourhood_rating_joined %>% 
  filter(date_code == "2019",
    walking_distance_to_nearest_greenspace == "More than 10 minutes") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
```

```{r}
neighbourhood_rating_joined %>% 
  filter(date_code == "2019") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
```
```{r}
neighbourhood_rating_joined %>% 
  filter(date_code == "2018") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
```
```{r}
neighbourhood_rating_joined %>% 
  count(date_code)
```

```{r}
neighbourhood_rating_joined %>% 
  filter(date_code == "2017") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
```

```{r}
neighbourhood_rating_joined %>% 
  filter(date_code == "2016") %>% 
  count(ca_name) %>% 
  arrange(desc(n))
```



```{r}
neighbourhood_rating %>% 
  count(urban_rural_classification)
```

```{r}
neighbourhood_rating_joined %>% 
  count(urban_rural_classification)
```
```{r}
green_spaces_joined %>% 
  count(distance_to_nearest_green_or_blue_space)
```

```{r}
green_spaces_joined %>% 
  count(gender)
```

##Are there certain groups that have/ lack local access to green space?


```{r}
green_spaces_joined
```

```{r}
green_spaces_joined %>% 
  count(distance_to_nearest_green_or_blue_space)
```

```{r}
#male/female access
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "A 5 minute walk or less") %>% 
  count(gender)
```
```{r}
#male/female access
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "An 11 minute walk or more") %>% 
  count(gender)
```
```{r}
#male/female access
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "Within a 6-10 minute walk") %>% 
  count(gender)
```

```{r}
#age access
green_spaces_joined %>% 
  count(age)
```

```{r}
#age access
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "A 5 minute walk or less",
         date_code == "2018") %>% 
  count(age)
```

```{r}
#age access
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "An 11 minute walk or more") %>% 
  count(age)
```
```{r}
green_spaces_joined
```

```{r}
#simd_quintiles
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "A 5 minute walk or less", 
         date_code == "2013") %>% 
  count(simd_quintiles)
```
```{r}
#simd_quintiles
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "An 11 minute walk or more") %>% 
  count(simd_quintiles)
```

```{r}
green_spaces_joined
```
```{r}
#simd_quintiles
green_spaces_joined %>% 
  filter(distance_to_nearest_green_or_blue_space == "A 5 minute walk or less", 
         date_code == "2019") %>% 
  count(simd_quintiles)
```


```{r}
#simd_quintiles
green_spaces_joined %>% 
  group_by(date_code) %>% 
  filter(distance_to_nearest_green_or_blue_space == "A 5 minute walk or less")

green_spaces_joined %>% 
ggplot(aes(x = simd_quintiles, colour = simd_quintiles))+
  geom_bar()
```

```{r}
green_spaces_joined %>% 
  mutate(age = factor(age, levels = c("16-34 years", "35-64 years",
                                      "65 years and over", "All")),
         simd_quintiles = factor(simd_quintiles, levels = c("20% most deprived",
                                                            "80% least deprived",
                                                            "All")),
         distance_to_nearest_green_or_blue_space = factor(
           distance_to_nearest_green_or_blue_space, levels = c(
             "A 5 minute walk or less",
             "Within a 6-10 minute walk",
             "An 11 minute walk or more",
             "Don't Know"
           )
         )) 
```


```{r}
#plotting simd_quintiles for every year to see if there're significant 
#differences for each year
#Filtering out Simd_quintiles == "All"
green_spaces_joined %>% 
  filter(simd_quintiles != "All") %>% 
ggplot(aes(x = distance_to_nearest_green_or_blue_space, fill = simd_quintiles))+
  geom_bar(position = "dodge")+
  facet_wrap(~date_code)
```

```{r}
#plotting excluding outliers
green_spaces_joined %>% 
  filter(measurement == "Percent") %>% 
ggplot(aes(x = simd_quintiles, fill = distance_to_nearest_green_or_blue_space))+
  geom_bar(position = "dodge")+
  facet_wrap(~date_code)
```


```{r}
#plotting walking distnace for total years
green_spaces_joined %>% 
ggplot(aes(x = distance_to_nearest_green_or_blue_space, fill = simd_quintiles))+
  geom_bar(position = "dodge")
```

```{r}
#age and distance to green spaces 
green_spaces_joined %>% 
    group_by(age, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = age, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
geom_col(position = "dodge")
```

```{r}
green_spaces_joined %>% 
    #filter(measurement == "Percent") %>% 
    group_by(ca_name, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = ca_name, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+ 
           geom_col(position = "dodge")+
  facet_wrap(~ca_name)
```

```{r}
#simd quinitles and distance to green spaces
green_spaces_joined %>%
  filter(simd_quintiles != "All") %>% 
    group_by(simd_quintiles, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = simd_quintiles, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```

```{r}
green_spaces_joined %>% 
    #filter(measurement == "Percent") %>% 
    group_by(, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = simd_quintiles, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```



```{r}
#subsetting the All
green_spaces_joined %>% 
    #filter(measurement == "Percent") %>% 
    group_by(simd_quintiles, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(subset(green_spaces_joined, simd_quintiles %in% c("All")))+
  geom_col(aes(simd_quintiles, mean_percentage, fill = distance_to_nearest_green_or_blue_space))
```

```{r}
#gender and distance to green spaces 
green_spaces_joined %>% 
    group_by(gender, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = gender, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```

```{r}
#urban/ rural areas and access to green spaces
green_spaces_joined %>% 
    group_by(urban_rural_classification, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = urban_rural_classification, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```

```{r}
#type of tenure and distance to nearest green space
green_spaces_joined %>% 
 filter(type_of_tenure != "All") %>% 
     group_by(type_of_tenure, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = type_of_tenure, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```

```{r}
#household type and distance to green spaces
green_spaces_joined %>% 
  filter(household_type != "All") %>% 
    group_by(household_type, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = household_type, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```

```{r}
#ethnicity
green_spaces_joined %>% 
    group_by(ethnicity, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = ethnicity, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```

```{r}
#ethnicity and trying to subset "All"
green_spaces_joined %>% 
    group_by(ethnicity, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(subset(ethnicity %in% "All"), aes(x = ethnicity, y = mean_percentage, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```

```{r}
   ggplot(subset(green_spaces_joined,
                 ethnicity %in% "All"), aes(x = ethnicity, y = value, fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```

```{r}
#ethnicity
green_spaces_joined %>% 
    group_by(ethnicity, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = ethnicity(-c("All")), 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```

```{r}
#ethnicity
green_spaces_joined %>% 
    group_by(ethnicity(-c("All")), distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = ethnicity, 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```

```{r}
#ethnicity
green_spaces_joined %>% 
  filter(ethnicity != "All") %>% 
    group_by(ethnicity, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = ethnicity, 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```

```{r}
#ethnicity
green_spaces_joined %>% 
 filter(ethnicity != "All") %>% 
    group_by(ethnicity, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = ethnicity, 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```


```{r}
green_spaces_joined
```

```{r}
#gender and distance to green spaces
green_spaces_joined %>% 
 filter(gender != "All") %>% 
    group_by(gender, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = gender, 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```

```{r}
#gender and distance to green spaces
green_spaces_joined %>% 
 filter(urban_rural_classification != "All") %>% 
    group_by(urban_rural_classification, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = urban_rural_classification, 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```
#working on neighbourhood rating
```{r}
neighbourhood_rating_joined
```

```{r}
community_belonging_joined
```

```{r}
neighbourhood_rating_joined %>% 
  count(neighbourhood_rating)
```



#interesting data, maybe I'll need to bin the data into good and poor, the difference is maybe more evident or perhaps I can plot the differences in two different plots.

```{r}
neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace != "All") %>% 
    group_by(neighbourhood_rating, walking_distance_to_nearest_greenspace) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = neighbourhood_rating, 
             y = mean_percentage, 
             fill = walking_distance_to_nearest_greenspace))+
  geom_col(position = "dodge")
```
```{r}
neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace != "All", 
         neighbourhood_rating == "Very good") %>% 
    group_by(neighbourhood_rating, walking_distance_to_nearest_greenspace) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = neighbourhood_rating, 
             y = mean_percentage, 
             fill = walking_distance_to_nearest_greenspace))+
  geom_col(position = "dodge")
```

```{r}
neighbourhood_rating_joined %>% 
  filter(walking_distance_to_nearest_greenspace != "All", 
         neighbourhood_rating == "Very poor") %>% 
    group_by(neighbourhood_rating, walking_distance_to_nearest_greenspace) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = neighbourhood_rating, 
             y = mean_percentage, 
             fill = walking_distance_to_nearest_greenspace))+
  geom_col(position = "dodge")
```

#binning into good and very good and poor and very poor 


```{r}
#urban/ rural areas and distance to green spaces
green_spaces_joined %>% 
 filter(urban_rural_classification != "All") %>% 
    group_by(urban_rural_classification, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = urban_rural_classification, 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```
```{r}
#urban/ rural areas and distance to green spaces
green_spaces_joined %>% 
 filter(urban_rural_classification != "All", 
        measurement == "Percent") %>% 
    group_by(urban_rural_classification, distance_to_nearest_green_or_blue_space) %>% 
    summarise(mean_percentage = mean(value)) %>% 
  ggplot(aes(x = urban_rural_classification, 
             y = mean_percentage, 
             fill = distance_to_nearest_green_or_blue_space))+
  geom_col(position = "dodge")
```



#let's do hypothesis testing on urban and rural 

```{r}
green_spaces_joined
```
```{r}
green_spaces_joined %>% 
  count(measurement)
```
#are there differences in green spaces access between rural or urban 

```{r}

```




